%load_ext autoreload
%autoreload 2
import os
MOMAPS_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps'
MOMAPS_DATA_HOME = '/home/labs/hornsteinlab/Collaboration/MOmaps'
LOGS_PATH = os.path.join(MOMAPS_DATA_HOME, "outputs/preprocessing/spd/logs/preprocessing_FUS/merged")
PLOT_PATH = os.path.join(MOMAPS_DATA_HOME, "outputs/preprocessing/spd/logs/preprocessing_FUS/plots")
os.chdir(MOMAPS_HOME)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams["image.cmap"] = "Set1"
from src.common.lib.preprocessing_utils import rescale_intensity
from src.common.lib.images_qc import *
import contextlib
import io
import matplotlib
import warnings
warnings.filterwarnings('ignore', category=pd.core.common.SettingWithCopyWarning)
from src.common.lib.qc_config_tmp import *
from src.common.lib.image_sampling_utils import *
from matplotlib.colors import LinearSegmentedColormap
pd.__version__
'1.3.5'
df = log_files_qc(LOGS_PATH,only_wt_cond=False)
df_dapi = df[df.marker=='DAPI']
df_target = df[df.marker!='DAPI']
reading logs of batch1
/home/labs/hornsteinlab/sagyk/.local/lib/python3.7/site-packages/ipykernel_launcher.py:1: DtypeWarning: Columns (8,12) have mixed types.Specify dtype option on import or set low_memory=False. """Entry point for launching an IPython kernel.
Total of 1 files were read. Before dup handeling (182500, 22) After duplication removal #1: (182500, 23) After duplication removal #2: (174862, 23)
# choose batches
# batches = [f'batch{i}' for i in range(2,6)]
batches = ['batch1']
batches
['batch1']
root_directory_raw = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'raw', 'SpinningDisk','FUS_lines_stress_2024_sorted')
batches_raw = [batch.replace("_16bit_no_downsample","") for batch in batches]
raws = run_validate_folder_structure(root_directory_raw, False, fus_panels, fus_markers.copy(),PLOT_PATH, fus_marker_info,
fus_cell_lines_to_cond, reps, fus_cell_lines_for_disp, fus_expected_dapi_raw,
batches=batches_raw, fig_width=15)
batch1 Folder structure is valid. No bad files are found. Total Sites: 203999
======== ====================
root_directory_proc = os.path.join(MOMAPS_DATA_HOME, 'input', 'images', 'processed', 'spd2',
'SpinningDisk','FUS_lines_stress_2024_sorted')
procs = run_validate_folder_structure(root_directory_proc, True, fus_panels, fus_markers,PLOT_PATH,fus_marker_info,
fus_cell_lines_to_cond, reps, fus_cell_lines_for_disp, fus_expected_dapi_raw,
batches=batches, fig_width=15)
batch1 Folder structure is valid. No bad files are found. Total Sites: 180993
======== ====================
display_diff(batches, raws, procs, PLOT_PATH, fig_width=15)
batch1
========
#for batch in list(range(3,9)) + ['7_16bit','8_16bit','9_16bit']:
for batch in batches:
with contextlib.redirect_stdout(io.StringIO()):
var = sample_and_calc_variance(root_directory_proc, batch,
sample_size_per_markers=200, cond_count=2, rep_count=len(reps),
num_markers=len(dnls_markers))
print(f'{batch} var: ',var)
batch1 var: 0.0165048514217068
By order of filtering
Percentage out of the total sites
dapi_filter_by_brenner = show_site_survival_dapi_brenner(df_dapi,batches, fus_line_colors, fus_panels,
figsize=(10,5))
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if Cellpose found 0 cells in it.
dapi_filter_by_cellpose = show_site_survival_dapi_cellpose(df_dapi, batches, dapi_filter_by_brenner,
fus_line_colors, fus_panels, figsize=(15,5))
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values.
A site will be filtered out if after tiling, no tile is containing at least 85% of a cell that Cellpose detected.
dapi_filter_by_tiling=show_site_survival_dapi_tiling(df_dapi, batches, dapi_filter_by_cellpose,
fus_line_colors, fus_panels, figsize=(15,5))
Percentage out of the sites that passed the previous filter. In parenthesis are absolute values (if different than the percentages).
show_site_survival_target_brenner(df_dapi, df_target, dapi_filter_by_tiling,
figsize=(15,8), markers=fus_markers)
names = ['Total number of tiles', 'Total number of whole cells']
stats = ['n_valid_tiles','site_whole_cells_counts_sum','site_cell_count','site_cell_count_sum']
total_sum = calc_total_sums(df_target, df_dapi, stats)
# for stat, name in zip(stats[:2], names):
# to_heatmap = total_sum.rename(columns={stat:'index'})
# plot_filtering_heatmap(to_heatmap, extra_index='marker', vmin=None, vmax=None,
# xlabel = name, show_sum=True, figsize=(4,8))
total_sum.sum()
batch batch1batch1batch1batch1batch1batch1batch1batc... cell_line_cond FUSHeterozygous BMAAFUSHeterozygous BMAAFUSHet... rep rep1rep2rep1rep2rep1rep2rep1rep2rep1rep2rep1re... panel panelApanelApanelApanelApanelApanelApanelApane... n_valid_tiles 661072 site_whole_cells_counts_sum 694989.0 site_cell_count 1897547.0 site_cell_count_sum 3160566.0 marker G3BP1G3BP1G3BP1G3BP1G3BP1G3BP1G3BP1G3BP1G3BP1G... dtype: object
show_total_sum_tables(total_sum)
| n_valid_tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| batch1 | ||||
| count | 1286.000000 | 1286.000000 | 1286.000000 | 1.286000e+03 |
| mean | 514.052877 | 5.140529 | 540.426905 | 1.475542e+03 |
| std | 297.924565 | 2.979246 | 317.249734 | 8.903212e+02 |
| min | 6.000000 | 0.060000 | 6.000000 | 7.000000e+00 |
| 25% | 275.000000 | 2.750000 | 284.000000 | 7.412500e+02 |
| 50% | 457.000000 | 4.570000 | 479.000000 | 1.366500e+03 |
| 75% | 755.500000 | 7.555000 | 802.750000 | 2.202500e+03 |
| max | 1704.000000 | 17.040000 | 1795.000000 | 5.204000e+03 |
| sum | 661072.000000 | NaN | 694989.000000 | 1.897547e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
| n valid tiles | % valid tiles | site_whole_cells_counts_sum | site_cell_count | |
|---|---|---|---|---|
| All batches | ||||
| count | 1286.000000 | 1286.000000 | 1286.000000 | 1.286000e+03 |
| mean | 514.052877 | 5.140529 | 540.426905 | 1.475542e+03 |
| std | 297.924565 | 2.979246 | 317.249734 | 8.903212e+02 |
| min | 6.000000 | 0.060000 | 6.000000 | 7.000000e+00 |
| 25% | 275.000000 | 2.750000 | 284.000000 | 7.412500e+02 |
| 50% | 457.000000 | 4.570000 | 479.000000 | 1.366500e+03 |
| 75% | 755.500000 | 7.555000 | 802.750000 | 2.202500e+03 |
| max | 1704.000000 | 17.040000 | 1795.000000 | 5.204000e+03 |
| sum | 661072.000000 | NaN | 694989.000000 | 1.897547e+06 |
| expected_count | 450.000000 | 450.000000 | 450.000000 | 4.500000e+02 |
df_no_empty_sites = df_dapi[df_dapi.n_valid_tiles !=0]
plot_cell_count(df_no_empty_sites, fus_lines_order, fus_custom_palette, y='site_cell_count_sum',
title='Cell Count Average per Site (from tiles)')
plot_cell_count(df_no_empty_sites, fus_lines_order, fus_custom_palette, y='site_whole_cells_counts_sum',
title='Whole Cell Count Average per Site')
plot_cell_count(df_no_empty_sites, fus_lines_order, fus_custom_palette, y='site_cell_count',
title='Cellpose Cell Count Average per Site')
plot_catplot(df_dapi, fus_custom_palette,reps, x='n_valid_tiles', x_title='valid tiles count', batch_min=1, batch_max=1)
plot_hm(df_dapi, split_by='rep', rows='cell_line_cond', columns='panel')
for batch in batches:
print(batch)
#batch_num = batch.replace('batch',"")
run_calc_hist_new(f'FUS_lines_stress_2024_sorted/{batch}', fus_cell_lines_for_disp, fus_markers,
hist_sample=10,sample_size_per_markers=200, ncols=8, nrows=4, dnls=True)
print("="*30)
batch1
--------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) /tmp/ipykernel_53633/1466877225.py in <module> 3 #batch_num = batch.replace('batch',"") 4 run_calc_hist_new(f'FUS_lines_stress_2024_sorted/{batch}', fus_cell_lines_for_disp, fus_markers, ----> 5 hist_sample=10,sample_size_per_markers=200, ncols=8, nrows=4, dnls=True) 6 print("="*30) /home/labs/hornsteinlab/Collaboration/MOmaps/src/common/lib/images_qc.py in run_calc_hist_new(batch, cell_lines_for_disp, markers, hist_sample, sample_size_per_markers, ncols, nrows, rep_count, cond_count, dnls) 923 raw=True, all_conds=False, rep_count=rep_count, cond_count=cond_count, exclude_DAPI=True) 924 images_proc = sample_images_all_markers_all_lines(INPUT_DIR_BATCH_PROC, _sample_size_per_markers=sample_size_per_markers,#*2, --> 925 _num_markers=len(markers), raw=False, all_conds=True) 926 927 if dnls: /home/labs/hornsteinlab/Collaboration/MOmaps/src/common/lib/image_sampling_utils.py in sample_images_all_markers_all_lines(input_dir_batch, _sample_size_per_markers, _num_markers, raw, all_conds, rep_count, cond_count, exclude_DAPI, markers_to_include) 154 logging.info(f"\n\n[sample_images_all_markers_all_lines]: input_dir_batch:{input_dir_batch}, _sample_size_per_markers:{_sample_size_per_markers}, _num_markers:{_num_markers}") 155 --> 156 for cell_line in sorted(os.listdir(input_dir_batch)): 157 158 # get the full path of cell line images FileNotFoundError: [Errno 2] No such file or directory: '/home/labs/hornsteinlab/Collaboration/MOmaps/input/images/processed/spd2/SpinningDisk/FUS_lines_stress_2024ed/batch1'
# save notebook as HTML ( the HTML will be saved in the same folder the original script is)
from IPython.display import display, Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))
os.system('jupyter nbconvert --to html src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.ipynb')
[NbConvertApp] Converting notebook src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.ipynb to html [NbConvertApp] Writing 16681096 bytes to src/preprocessing/notebooks/no_ds/qc_report_newPP_dNLS.html
0